
# data uitls
pacman::p_load(data.table, tidyverse, rio, boot, magrittr)
#  panel / factor model packages
pacman::p_load(synthdid, fixest, kernlab, ebal, grf, npcausal, estimatr)
# data uitls
set.seed(42)

if (Sys.info()[['user']] == "alal") {
  root = "/home/alal/Dropbox/1_Research/ElecAdminFunding/ElecAdminFundingReplication"
} else {
  root = "~/Dropbox/ElecAdminFunding/ElecAdminFundingReplication"
}

##
# Set up data and estimation functions
##

# Set up the panel analysis data
raw = rio::import(file.path(root, "/modified_data/ctcl.dta")) %>% setDT
raw[, treated := ifelse((amount > 0) & policy_year == 2020, 1, 0)]
raw[policy_year == 2020, .N, treated]
raw[, ever_treated := max(treated), fips]

# Set up the cross-sectional analysis data
demvs_wide = dcast(raw[, .(fips, policy_year, vs_dem_pres)],
          fips ~ policy_year, value.var = "vs_dem_pres") %>%
  set_colnames(c("fips", paste0("vs_dem_", seq(1992, 2020, 4)))) %>%
  setkey('fips')
turnout_wide = dcast(raw[, .(fips, policy_year, turnout_pres)],
          fips ~ policy_year, value.var = "turnout_pres") %>%
  set_colnames(c("fips", paste0("turnout_", seq(1992, 2020, 4)))) %>%
  setkey('fips')

#
kv = c("fips", "treated")
wide_data = raw[policy_year == 2020, ..kv] %>%
  setkey('fips') %>%
  .[demvs_wide] %>%
  .[turnout_wide]

# Set up a function for bootstrapping entropy balancing estimates
booter = \(data, i, yn, xn) {
    df = data[i, ]
    ewt = ebalance(df[[wn]], as.matrix(df[, ..xn]))$w
    mean(df[treated == 1, get(yn)]) - weighted.mean(df[treated == 0, get(yn)], ewt)
}


##
# All estimators
##

# Various synthetic control and synth diff-in-diff 
# estimates of the effect of CTCL grant on dem vote share and turnout
demvs_panel = panel_estimate(raw, "fips", "policy_year", "treated", "vs_dem_pres",
  reps = 1000, methods = c("SC (Regularized)", "SDID (No Intercept)"))$summary_table
tur_panel = panel_estimate(raw, "fips", "policy_year", "treated", "turnout_pres",
  reps = 1000, methods = c("SC (Regularized)", "SDID (No Intercept)"))$summary_table


# Define variables necessary to run all cross-sectional estimators
n = nrow(wide_data)
y1n = "vs_dem_2020"; y2n =  "turnout_2020"; wn = "treated"
xn = setdiff(colnames(wide_data), c("fips", y1n, y2n, wn))
lagged_y1 = paste0("vs_dem_", seq(1992, 2016, 4))
lagged_y2 = paste0("turnout_", seq(1992, 2016, 4))
wide_data = wide_data[order(get(wn))]

# Lagged dependent variable regression estimates of
# effect of grant on dem vote share and turnout
demvs_est_lm = feols(.[y1n] ~ .[wn] + .[lagged_y1], wide_data) %>% summary %>% .$coeftable %>% .[2,1:2]
tur_est_lm   = feols(.[y2n] ~ .[wn] + .[lagged_y2], wide_data) %>% summary %>% .$coeftable %>% .[2,1:2]

# Lagged dependent variable entropy balancing bootstrap
# estimate of effect of grant on dem vote share and turnout
demvs_est_eb = c(
    booter(wide_data, 1:nrow(wide_data), y1n, lagged_y1),
    boot(wide_data, booter, R = 1000,
         yn = y1n, xn = lagged_y1, parallel = "multicore", ncpus = 10) %>%
    .$t %>% sd)
tur_est_eb = c(booter(wide_data, 1:nrow(wide_data), y2n, lagged_y2),
    boot(wide_data, booter, R = 1000,
         yn = y2n, xn = lagged_y2, parallel = "multicore", ncpus = 10) %>%
    .$t %>% sd)

# Lagged dependent variable causal forest estimate of 
# effect of grant on dem vote share and turnout
demvs_est_aipw = causal_forest(wide_data[,..lagged_y1] %>% as.matrix,
                     wide_data[[y1n]] %>% as.numeric,
                     wide_data[[wn]] %>% as.numeric,
                     tune.parameters = "all") %>%
    average_treatment_effect(target.sample = "treated")
tur_est_aipw = causal_forest(wide_data[,..lagged_y2] %>% as.matrix,
                    wide_data[[y2n]] %>% as.numeric,
                    wide_data[[wn]] %>% as.numeric,
                    tune.parameters = "all"
                     ) %>%
    average_treatment_effect(target.sample = "treated")

# Lagged dependent variable super learner estimate of 
# effect of grant on dem vote share and turnout
superlearners = c("SL.gam", "SL.glmnet", "SL.earth", "SL.ranger", "SL.xgboost")
demvs_est_aipw_sl = att(x = wide_data[,..lagged_y1] %>% as.matrix,
                   y = wide_data[[y1n]] %>% as.numeric,
                   a = wide_data[[wn]] %>% as.numeric,
                   sl.lib = superlearners)$res[3, 1:3]
tur_est_aipw_sl = att(x = wide_data[,..lagged_y2] %>% as.matrix,
                   y = wide_data[[y2n]] %>% as.numeric,
                   a = wide_data[[wn]] %>% as.numeric,
                   sl.lib = superlearners)$res[3, 1:3]


##
# Get sample sizes and save all of the estimation output
##

# Get the number of observations, units, and treated units
get_Ns = function(data, id_name, y_name, w_name){
  d = select(data, all_of(c(id_name, y_name, w_name)))
  colnames(d) = c("id", "y", "w")
  obs = nrow(d)
  counties = nrow(distinct(d, id))
  treat_counties = nrow(distinct(filter(d, w==1), id))
  return(c(treat_counties, counties, obs))
}
obs_panel_vs = get_Ns(raw, id_name="fips", y_name="vs_dem_pres", w_name="treated")
obs_wide_vs = get_Ns(wide_data, id_name="fips", y_name="vs_dem_2020", w_name="treated")
obs_panel_tur = get_Ns(raw, id_name="fips", y_name="turnout_pres", w_name="treated")
obs_wide_tur = get_Ns(wide_data, id_name="fips", y_name="turnout_2020", w_name="treated")

# Save estimated effects on dem vote share
demvs_table = cbind(
  demvs_panel,
  demvs_est_lm,
  demvs_est_eb,
  demvs_est_aipw,
  t(demvs_est_aipw_sl[,-1])
  ) %>% set_rownames(c("Est", "SE")) %>%
  set_colnames(c("SDID_NoInt", "SC_Reg", "OLS", "Ebal", "AIPW_Forest", "AIPW_SL"))

# Save estimated effects on turnout
tur_table = cbind(
  tur_panel,
  tur_est_lm,
  tur_est_eb,
  tur_est_aipw,
  t(tur_est_aipw_sl[,-1])
  ) %>% set_rownames(c("Est", "SE")) %>%
  set_colnames(c("SDID_NoInt", "SC_Reg", "OLS", "Ebal", "AIPW_Forest", "AIPW_SL"))

# Save the table with observation numbers
obs_table = data.frame(obs_panel_vs, obs_wide_vs, obs_panel_tur, obs_wide_tur)

# Combine and save all output
save(demvs_table, tur_table, obs_table,
  file = file.path(root, "tmp/robustness_tables.RData"))
